*--------------------------------------------------------------------------------------------------------------------	* 
* RESEARCHERS:		EARN												   						   		  				*
* PROGRAMMED BY:	Linea Hasager, Janis Kreuder.																   						*
* DESCRIPTION:		Information on municipality and age are added to the first_immigration_and_admission_latest_record 	*
*					dataset. Familiy-unified to refugees are also identified. Output: dataset refugees and family-reunified.										*
* CREATED:			Oct. 9, 2017																		   				*
* LAST MODIFIED:	May 31, 2022													       								*
*-------------------------------------------------------------------------------------------------------------------	;

libname pop "H:\Rawdata\707455\Grunddata";
libname raw "H:\Rawdata\707455\Views";

libname input 'L:\Workdata\707455\Papers\ABFHP_1999reform\Code\ReStat\Input';
libname format "D:\Formater\forskerformater\Kommuner";


*Macros for input and output data and programs;
%let outputdata=L:\Workdata\707455\Papers\ABFHP_1999reform\Code\ReStat\Input;
%let input=L:\Workdata\707455\Papers\ABFHP_1999reform\Code\ReStat\Input;


*IMPORT IMMIGRATION DATA.;
proc import out=work.immi
datafile="&input.\First_immigration_and_admission_latest_record.dta" 
dbms=dta replace; run;



/*LOAD DATA ON AGE AND MUNICIPALITY. CHOOSE FIRST AND LAST YEAR IN BEF REGISTER*/
*CONVERT PERSON ID TO NUMERIC FORMAT;
%let first=1997;
%let last=2019;

%macro load(data);
data bef (drop=pnr_char rename=(befupdSourceYear=year));
set raw.&data.v (keep=pnr bopikom opgikom foed_dag kom familie_id ie_type opr_land statsb koen efalle &data.SourceYear rename=(pnr=pnr_char));
rename kom=KOM_BEF bopikom=bopikom_BEF opgikom=opgikom_bef;
if befupdSourceYear>=&first. and befupdSourceYear <= &last.;
pnr=pnr_char*1;
if kom_bef=999 then kom_bef=.;
run;
%mend;
%load(befupd);



/*LOAD DATA ON INTERNAL MIGRATIONS*/
%let y=2017;

%macro load(data,year);
data _&data.;
set pop.&data.&year. (drop=cprtjek cprtype bop_rfra bop_ajour);
if pnr ne ' ' AND ADRESSE_ID NE ' ';
run;

proc sort data=_&data.; by adresse_id; run;
%mend;
%load(befbop, &y.);

%macro load(data,year);
data _&data.;
set pop.&data.&year. (drop= adr_:);
IF ADRESSE_ID NE ' ';
run;

proc sort data=_&data.; by adresse_id; run;
%mend;
%load(befadr, &y.);


/*MERGE INFO ON ADDRESS TO INDIVIDUAL-LEVEL INTERNAL MIGRATIONS*/
data flyt (drop=pnr_char);
merge _befbop (in=a rename=(pnr=pnr_char)) _befadr;
by adresse_id;
if a;
pnr=pnr_char*1;
if kom ne " " and kom ne "004" and kom ne "007" and kom ne "009" and kom ne "010" and kom ne "011" and kom ne "012" and kom ne "019";
run;



/*SORT DATA*/
proc sort data=bef;
by pnr year;
run;
proc sort data=flyt;
by pnr bop_vfra;
run;
proc sort data=immi;
by pnr year; run;




/*USE MOST RECENT INFORMATION ON BIRTHDAY, ORIGIN, AND GENDER IF VARIABLE IS MISSING OR RECORDED WITH MULTIPLE VALUES*/
%macro recentinfo(var=);
proc sort data=bef out=&VAR.; by pnr descending year; run;

data &VAR.;
set &VAR.;
if &VAR. ne .;
by pnr;
if first.pnr ;
keep pnr &VAR.;
run;
%mend;
%recentinfo(var=foed_dag);
%recentinfo(var=ie_type);
%recentinfo(var=koen);
%recentinfo(var=opr_land);


/*LAG YEAR IN BEF-DATA ONE PERIOD, SO MUNICIPALITY ONE YEAR AFTER IMMIGRATION CAN BE IDENTIFIED FROM BEF*/
/*LAG YEAR IN BEF-DATA 2 PERIODS, SO MUNICIPALITY TWO YEARS AFTER IMMIGRATION CAN BE IDENTIFIED FROM BEF IF FIRST YEAR IS MISSING*/
/*LAG YEAR IN BEF-DATA 3 PERIODS, SO MUNICIPALITY THREE YEARS AFTER IMMIGRATION CAN BE IDENTIFIED FROM BEF IF FIRST AND SECOND YEARS ARE MISSING*/
%macro lag;
%do j=1 %to 3;
data bef&j. (drop=year&j.);
set bef (rename=year=year&j. drop=foed_dag);
by pnr;
year=year&j.-%eval(&j.);
rename statsb=statsb&j. kom_bef=KOM_bef&j. bopikom_bef=bopikom_bef&j. opgikom_bef=opgikom_bef&j. ie_type=ie_type&j. familie_id=familie_id&j. opr_land=opr_land&j. efalle=efalle&j.;
if year ne . and year > 1996;
label year='Year of entry';
run;
%end;
%mend;
%lag;


/*SORT DATA*/
proc sort data=immi; by pnr; run;
proc sort data=foed_dag; by pnr; run;
proc sort data=ie_type; by pnr; run;
proc sort data=koen; by pnr; run;
proc sort data=opr_land; by pnr; run;


/* CALCULATE AGE AT DATE OF IMMIGRATION AND ADD ORIGIN*/
data immi;
merge immi (in=a) foed_dag ie_type koen opr_land;
by pnr;
if a;
if foed_dag ne . and dato ne . then age=int(yrdif(foed_dag,dato,age));
run;

/*SORT DATA*/
proc sort data=immi; by pnr year;
proc sort data=bef1; by pnr year;
proc sort data=bef2; by pnr year;
proc sort data=bef3; by pnr year; run;

/* ADD MUNICIPALITY INFO FROM JANUARY 1ST YEAR AFTER IMMIGRATION DATE. IF INFO IS MISSING FIRST YEAR, THEN MUNICIPALITY TWO YEARS AFTER IS USED. 
IF THIS IS MISSING THEN INFO 3 YEARS AFTER IS USED. SAME PROCEDURE IS USED FOR FAMILY_ID, SPOUSE-ID AND CITIZENSHIP AT ARRIVAL.*/
data immi2;
merge immi (in=a rename=ie_type=ie_type_recent rename=opr_land=opr_land_recent)
bef1 (keep=pnr year KOM_BEF1 bopikom_bef1 opgikom_bef1 familie_id1 statsb1 efalle1)
bef2 (keep=pnr year KOM_BEF2 bopikom_bef2 opgikom_bef2 familie_id2 statsb2 efalle2) 
bef3 (keep=pnr year KOM_BEF3 bopikom_bef3 opgikom_bef3 familie_id3 statsb3 efalle3);
by pnr year;
if a;
%macro var(var=);
&var.=&var.1; &var._nr=1;
if &var. =. then do; 
&var.=&var.2; &var._nr=2;
end;
if &var. =. then do;
&var.=&var.3; &var._nr=3;
end;
if &var. = . then &var._nr=.;
%mend;
%var(var=STATSB);

%macro varchar(var=);
&var.=&var.1; &var._nr=1;
if &var. = "" then do; 
&var.=&var.2; &var._nr=2;
end;
if &var. ="" then do;
&var.=&var.3; &var._nr=3;
end;
if &var. = "" then &var._nr=.;
%mend;
%varchar(var=KOM_BEF);
%varchar(var=bopikom_bef);
%varchar(var=opgikom_bef);
%varchar(var=FAMILIE_ID);
%varchar(var=EFALLE);
if statsb_nr=3 then statsb=.;
drop kom_bef1 kom_bef2 kom_bef3 bopikom_bef1 bopikom_bef2 bopikom_bef3 opgikom_bef1 opgikom_bef2 opgikom_bef3 familie_id1 familie_id2 familie_id3 /*ie_type1 ie_type2 ie_type3 opr_land1 opr_land2 opr_land3*/ statsb1 statsb2 statsb3 
familie_id_nr statsb_nr efalle_nr efalle1 efalle2 efalle3;* ie_type_nr opr_land_nr;
run;

/* KEEP FIRST AND SECON INTERNAL MIGRATION*/
proc sort data=flyt;
by pnr bop_vfra;
run;

*SECOND MIGRATION;
data flyt2 (where=(flyt=2));
set flyt;
retain flyt;
by pnr;
if first.pnr then flyt=1; else flyt=flyt+1;
run;

*FIRST MIGRATION;
data flyt1;
set flyt;
by pnr;
if first.pnr;
run;


 /*SORT DATA*/
 proc sort data=flyt1; by pnr; run;
 proc sort data=flyt2; by pnr; run;
 proc sort data=immi2; by pnr; run;


 *FORMAT FOR SOURCE-VARIABLE;
proc format;
value source
1 ="BEF"
2 ="First move (FLYT)"
3 ="Second move (FLYT)";
run;

/*MERGE DATASETS AND CREATE LABELS. DEFINE FIRST MUNICIPALITY: USE FIRST MUNICIPALITY FROM BEF 1, 2 OR 3 YEARS AFTER IMMIGRATION. IF AN INTERNAL MIGRATION IS RECORDED PRIOR TO THIS
DATE, THIS MUNICIPALITY OF ORIGIN IS USED AS FIRST MUNICIPALITY. IF MUNICIPALITY IS MISSING FIRST THREE YEARS AFTER IMMIGRATION, THEN MUNICIPALITY OF ORIGIN AT FIRST INTERNAL
MIGRATION IS USED. INFO FROM FIRST INTERNAL MIGRATION IS NOT USED IF THE INTERNAL MIGRATION YEAR IS PRIOR TO YEAR OF IMMIGRATION.
IF A MOVE WITHIN THE SAME MUNICIPALITY IS REGISTERED FROM THE FIRST ADRESS WITHIN 3 MONTHS AFTER IMMIGRATION, THIS ADRESS IS CONCIDERED THE PERMANENT ADRESS. 
VERY EARLY MOVES COULD INDICATE THAT THE FIRST ADRESS WAS TEMPORARY HOUSING. INFORMATION FROM SECOND ADDRESS IN FLYT IS ADDED*/
data immigration ;
merge 	immi2 (in=a) flyt1 (drop=adresse_id bop_vtil rename=(bop_vfra=haend_dato kom=gl_kom))
		flyt2 (drop=adresse_id bop_vtil flyt rename=(bop_vfra=haend_dato_2 kom=municipality_org_2 bopikom=bopikom_2));
by pnr;
if a;
if kom_bef ne . and kom_bef_nr=1 then dato_kom_bef=intnx('year',dato,1);
if kom_bef ne . and kom_bef_nr=2 then dato_kom_bef=intnx('year',dato,2);
if kom_bef ne . and kom_bef_nr=3 then dato_kom_bef=intnx('year',dato,3);
format dato ddmmyy. tilladelsesdato ddmmyy. haend_dato ddmmyy. dato_kom_bef ddmmyy. ;

*FIRST ADDRESS INFORMATION FROM BEF;
first_municipality=kom_bef*1;
mun_year=kom_bef_nr;
opgikom_year=opgikom_bef_nr;
first_bopikom=bopikom_bef;
first_opgikom=opgikom_bef;
first_municipality_bef=kom_bef*1;
if kom_bef not eq . then first_bopikom_source=1;

*USE FIRST ADDRESS INFO FROM FLYT IF BEF INFO IS MISSING;
if kom_bef eq . and year(haend_dato) >= year then do;
first_municipality=gl_kom*1;
first_bopikom=bopikom;
first_bopikom_source=2;
mun_year=year(haend_dato)-year;
end;

*USE FIRST ADDRESS INFO FROM FLYT IF IT IS RECORDED PRIOR TO BEF INFO;
if haend_dato <= dato_kom_bef and year(haend_dato) >= year and haend_dato ne . and dato_kom_bef ne . then do;
first_municipality=gl_kom*1; 
first_bopikom=bopikom;
first_bopikom_source=2;
mun_year=year(haend_dato)-year;
end;

*REPLACE INFO WITH SECOND MOVE IF THE SECOND MOVE OCCURS LESS THAN 3 MONTHS AFTER IMMIGRATION WITHIN SAME MUNICIPALITY;
if haend_dato_2 <= intnx('month',dato,4) and haend_dato_2 ne . and first_municipality=municipality_org_2*1 then do; 
first_municipality=municipality_org_2*1; 
first_bopikom=bopikom_2;
first_bopikom_source=3;
mun_year=year(haend_dato_2)-year;
end;


*SAVE ORIGINAL MUNICIPALITY CODE;
municipality_org=first_municipality;

*LABELS;
label municipality_org="First municipality, original code" opgikom_year="Measurement year for opgikom (in years since migration)" 
first_opgikom="First opgikom in BEF" mun_year="Measurement year first_municipality (in years since immigration)"
first_bopikom="First permanent adress (bopikom) (second address in FLYT if move occured within 3 months of immigration)" 
municipality_org_2="Municipality, second address" 
bopikom_2="Second address (bopikom)" haend_dato_2="Move-in date, second address"
first_bopikom_source="Source for first_bopikom and first_municipality" first_municipality_bef="First Municipality BEF (matches first opgikom)";
format first_bopikom_source source.;

drop haend_dato_2 bopikom_2 municipality_org_2 kom_bef--opgikom_bef_nr haend_dato--dato_kom_bef;
run;



/*KEEP ONLY FAMILIY-REUNIFIED WITH KNOWN FAMILY-ID AND ENTRY YEAR*/
data unified;
set immigration;
if kategori=5;
if familie_id ne ' ' AND YEAR NE .;
run;

/*SORT DATA*/
proc sort data=bef; by pnr; run;
proc sort data=immi2; by pnr; run;

*CREATE PANEL OF REFUGEES IN ORDER TO MATCH FAMILY UNIFIED TO REFUGEES BY FAMILY-ID . KEEP IF FAMILY_ID NOT MISSING. DELETE IF DEFINED AS DANISH BY IE_TYPE;
data refugee_panel (rename=(pnr=pnr_r tilladelsesdato=tilladelsesdato_r grundlag=grundlag_r  kategori=kategori_r dato=dato_r age=age_r) drop=efalle);
merge immi2 (in=a keep=pnr dato tilladelsesdato grundlag kategori opr_land_recent IE_TYPE_RECENT statsb efalle age)
bef (in=b drop= kom_BEF opr_land ie_type statsb efalle);
by pnr;
if b;
if  kategori=1 then refugee=1;
if refugee=1;
if familie_id ne ' ' AND YEAR NE . and ie_type_recent ne 1;
efalle_r=efalle*1;
label efalle_r="Identifaction nr for spouse of the refugee";
run;

/*SORT DATA AND KEEP ONLY ONE REFUGEE FAMILY-ID*/
proc sort data=refugee_panel; by familie_id year descending age_r; run;
proc sort data=refugee_panel NODUPKEY; by familie_id; run;
proc sort data=unified; by familie_id year; run;


/*MATCH FAMILY-UNIFIED TO REFUGEES AND KEEP ONLY UNIFIED TO REFUGEES*/
data unified_refugee;
merge unified (in=a) refugee_panel (in=b drop=year);
if year ne . and familie_id ne ' ' and ie_type_recent ne 1;
by familie_id;
if a and refugee=1;
ref1=1;
label ref1='Unified to refugee (family_id)';
drop efalle_r;
run;

/*DEFINE UNIFIED TO REFUGEES BY 'FORKLAR' VARIABLE WHICH EXISTS FROM 2010*/
data unified_refugee2;
set immigration;
if kategori=5 and (forklar=126 or forklar=164 or forklar=187 or forklar=181 or forklar=90) then ref2=1; else ref2=0;
if ref2=1;
label ref2='Unified to refugee (forklar)';
run;

/*SORT DATA*/
proc sort data=unified_refugee2; by pnr; 
proc sort data=unified_refugee; by pnr; run;

/*MERGE THE TWO DATASETS OF UNIFIED TO REFUGEES TO COMPARE THE TWO METHODS*/
data test_refugee; merge unified_refugee unified_refugee2;
by pnr;
year_admission=year(tilladelsesdato);
run;


/*SORT DATA*/
proc sort data=immigration; by pnr;
proc sort data=test_refugee; by pnr; run;

/*MERGE IMMIGRATION_ADMISSION DATA WITH DATA ON UNIFIED TO REFUGEES BASED ON THE TWO DEFINITIONS*/
data refugees_and_family_unified;
merge immigration (in=a) test_refugee (in=b keep=pnr ref1 ref2);
by pnr;

if kategori=1 then type=1;
if ref1=1 then type=2;
if kategori=1 then type2=1;
if ref1=1 and ref2=1 then type2=2;
year_admission=year(tilladelsesdato);
if kategori=1 then type3=1;
if ref2=1 then type3=2;
run;

/*CREATE VALUE LABELS FOR DIFFERENT DEFINITIONS OF FAMILY-UNIFIED*/
proc format;
value type
1='Refugee'
2='Family unified to refugee (definitionen: e-family)';
run;
proc format;
value type_both
1='Refugee'
2='Family unified to refugee (definition: e-family AND forklar)';
run;
proc format;
value type_forklar
1='Refugee'
2='Family unified to refugee (definition: forklar)';
run;
proc format;
value ref_type
1='Refugee'
2='Family unified to refugee';
run;


*KEY TO CONVERT OLD MUNICIPALITIES TO NEW;
data kommune;
input GL_kommune NY_kommune;
cards;
101 101
147 147
151 151
153 153
155 155
157 157
159 159
161 161
163 163
165 165
167 167
169 169
171 240
173 173
175 175
181 230
183 183
185 185
187 187
189 190
201 201
205 230
207 190
208 210
209 250
211 260
213 270
215 270
217 217
219 219
221 260
223 223
225 250
227 210
229 250
231 219
233 219
235 240
237 240
251 350
253 253
255 265
257 350
259 259
261 350
263 265
265 265
267 259
269 269
271 336
301 326
303 340
305 306
307 370
309 326
311 330
313 320
315 316
317 326
319 326
321 316
323 326
325 330
327 306
329 329
331 330
333 330
335 340
337 340
339 316
341 316
343 306
345 316
351 320
353 370
355 360
357 370
359 360
361 390
363 360
365 390
367 360
369 376
371 376
373 370
375 376
377 390
379 360
381 360
383 360
385 320
387 376
389 336
391 376
393 370
395 376
397 390
400 400
401 400
403 400
405 400
407 400
409 400
411 411
421 420
423 480
425 430
427 479
429 410
431 430
433 420
435 479
437 420
439 440
441 440
443 492
445 410
447 440
449 450
451 410
461 461
471 480
473 430
475 482
477 430
479 479
481 482
483 480
485 420
487 482
489 450
491 420
493 492
495 450
497 430
499 420
501 540
503 580
505 550
507 540
509 510
511 510
513 540
515 510
517 550
519 580
521 550
523 540
525 510
527 575
529 580
531 550
533 540
535 540
537 540
539 580
541 550
543 510
545 580
551 530
553 573
555 573
557 561
559 575
561 561
563 563
565 530
567 561
569 575
571 561
573 573
575 575
577 573
601 615
603 630
605 621
607 607
609 615
611 630
613 766
615 615
617 630
619 766
621 621
623 621
625 756
627 630
629 621
631 630
651 657
653 756
655 760
657 657
659 760
661 661
663 756
665 665
667 760
669 760
671 671
673 665
675 671
677 657
679 661
681 760
683 661
685 657
701 706
703 746
705 740
707 707
709 710
711 710
713 710
715 746
717 710
719 846
721 706
723 730
725 707
727 727
729 730
731 730
733 706
735 707
737 746
739 706
741 741
743 740
745 746
747 707
749 740
751 751
761 791
763 791
765 787
767 710
769 791
771 740
773 773
775 791
777 779
779 779
781 779
783 779
785 787
787 787
789 791
791 791
793 791
801 846
803 849
805 810
807 810
809 820
811 849
813 813
815 846
817 851
819 860
821 860
823 846
825 825
827 820
829 860
831 851
833 840
835 849
837 851
839 860
841 813
843 840
845 840
847 813
849 849
851 851
861 820
901 901
903 903
905 905
907 907
909 909
911 911
913 913
915 915
917 917
919 919
921 921
923 923
925 925
927 927
929 929
941 941
951 951
953 953
961 961
999 999
;
run;

*SAVE KEY IN STATA FORMAT;
proc export data=kommune dbms=dta outfile="&outputdata.\municipalitykey" replace; run;



data kommune;
set kommune;
rename gl_kommune=first_municipality;
run;

proc sort data=refugees_and_family_unified; by first_municipality; run;
proc sort data=kommune; by first_municipality; run;

/*CONVERT OLD MUNICIPALITY CODES TO NEW CODES AND GENERATE INDICATORS FOR REFUGEES AND FAMILY-UNIFIED*/
data refugees_and_family_unified (rename=first_mun=first_municipality);
merge refugees_and_family_unified (in=a) kommune ;
by first_municipality;
if a;
first_mun=first_municipality;
if ny_kommune ne . then first_mun=ny_kommune;
label first_mun ="First municipality of residence";
drop first_municipality ny_kommune;
ref_type=0;
if kategori=1  and ie_type_recent ne 1 then ref_type=1;
if ref1=1 and ie_type_recent ne 1 then ref_type=2;
if ref1=1 and ie_type_recent eq 1 then ref1=.;
format ref_type ref_type.;
run;

/*CONVERT MUNICIPALITY CODES TO NAMES*/
data format; set format.kom_til_navn_mm_2007; rename kom=first_municipality; run;
proc sort data=format; by first_municipality; run;
proc sort data=refugees_and_family_unified; by first_municipality; run;

/*ADD MUNICIPALITY NAMES AND CREATE LABELS*/
data refugees_and_family_unified;
merge refugees_and_family_unified (in=a) format (keep=first_municipality navn);
by first_municipality;
if a;
label foed_dag="Birthday" age="Age at immigration" mun_year="Year difference between first municipality and immigration" year_admission="Year of admission"
first_municipality="First municipality of residence within 2 years" ref_type="Refugee (1) or family-reunified (2)" navn="Name of municipality" 
statsb="Citizenship at entry (found within 2 years of entry)";
run;


*DROP UNNECESSARY VARIABLES AND KEEP ONLY REFUGEES AND REUNIFIED 18-64 YEARS OLD.;
*SET MUNICIPALITY TO MISSING IF NOT KNOWN WITHIN 2 YEARS.;
data refugees_and_family_unified2;
merge refugees_and_family_unified (in=a) format (keep=first_municipality navn);
by first_municipality;
if a;
IF (REF_TYPE=1 OR REF_TYPE=2) AND AGE > 17 AND AGE < 65;
if mun_year > 2 or mun_year eq . then do;
first_municipality=.;
first_bopikom="";
first_opgikom="";
navn = '';
end;
if navn = '' then mun_year=.;
drop kom_bef:
type type2 type3 familie_id;
rename opr_land_recent=OPR_LAND ie_type_recent=IE_TYPE;
run;



 /*EXPORT TO STATA*/
proc export data=refugees_and_family_unified2 dbms=dta outfile="&outputdata.\Refugees1997_" replace; run;







